In [None]:
from torch import empty
from torch.random import manual_seed
from matplotlib import pyplot as plt

import modules as m
from data import generate_data
from helpers import optimize, pickle_dump, pickle_load

manual_seed(42)

%load_ext autoreload
%autoreload 2

In [None]:
num_samples=1000
epochs=100
train_data, test_data = generate_data(num_samples)

In [None]:
def tune_model(model):    
    learning_rates = 0.01 * 2**np.arange(8)
    batch_sizes = [1, 10, 25, 50, 100, 200, 500, num_samples]

    results = []
    for batch_size in batch_sizes:
        best_loss = best_learning_rate = best_batch_losses = None
        for learning_rate in learning_rates:
            batch_losses, train_losses, test_losses = optimize(
                model,
                train_data,
                test_data,
                epochs=epochs,
                batch_size=batch_size,
                lr=learning_rate,
                verbose=False
            )
            best_loss_achieved = test_losses.min().item()
            if best_loss is None or best_loss_achieved < best_loss:
                best_loss = best_loss_achieved
                best_learning_rate = learning_rate
                best_batch_losses = batch_losses
                best_test_losses = test_losses
                best_train_losses = train_losses
        results.append((batch_size, best_learning_rate, best_batch_losses, best_test_losses, best_train_losses))
    return results

In [None]:
filename = 'results'
try:
    results = pickle_load(filename)
except FileNotFoundError:
    input_dim = 2
    output_dim = 1
    nb_hidden = 25
    model = m.Sequential(
        m.Linear(input_dim, nb_hidden),
        m.Tanh(),
        m.Linear(nb_hidden),
        m.Tanh(),
        m.Linear(nb_hidden),
        m.Tanh(),
        m.Linear(nb_hidden, output_dim),
        m.Sigmoid()
    )
    results = tune_model(model)
    pickle_dump(filename, results)

In [None]:
def best_so_far(tensor):
    ndarr = tensor.numpy()
    for i, el in enumerate(ndarr):
        if i > 0 and ndarr[i-1] < ndarr[i]:
            ndarr[i] = ndarr[i-1]
    return ndarr

In [None]:
fig, ax = plt.subplots(figsize=(9, 5))
plt.yscale('log') 
for batch_size, lr, batch_losses, _, _ in results:
    x = range(batch_size, batch_size * len(batch_losses) + batch_size, batch_size)
    ax.plot(x, best_so_far(batch_losses), label = batch_size)

plt.title('Training loss measured after each minibatch')
plt.xlabel('Number of samples seen')
plt.ylabel('log(loss)')
plt.legend(title='Batch size');
plt.savefig('plots/loss-per-minibatch');

In [None]:
fig, ax = plt.subplots(figsize=(9, 5))
plt.yscale('log') 
for batch_size, lr, _, _, train_losses in results:
    N = num_samples
    x = range(N, epochs * N + N, N)
    ax.plot(x, best_so_far(train_losses), label = batch_size)

plt.title('Training loss measured after each epoch')
plt.xlabel('Number of samples seen')
plt.ylabel('log(loss)')
plt.legend(title='Batch size')
plt.savefig('plots/loss-per-epoch');